
******************************************************************************
*Appendix Table 6: Individual fixed effects regression model using individuals 
*                  with 3 or 4 observations and weight adjustment
******************************************************************************

version 15.1
cd "${mypath}\CHNS_project\01_data\02_posted\"

log using "${mypath}\CHNS_project\03_log_files\11_appendix_table_6.log", replace

use CHNS_1993_2015_20_imputed.dta,clear
mi import ice, automatic

*generate exclusion condition, that is, people who work in agriculture in urban area
gen be_farmer = (migrant_group != 2 & occupation == 3 & work_unit == 4) if be_employed == 1

*generate "over the statutory hours last week"
gen over_lastw = hours_lastw1 > 44 if employed_last1 == 1

*generate annual salary last year(log)
gen log_wage_i = log(exp(log_wage_1)*workmonths1+exp(log_bonuse_1)) if employed_last1 == 1

*generate hourly wage last year(log)
gen log_hour_wage = log_wage_1-log((365/12/7)*workdays1*workhours1) if employed_last1 == 1

*generate occupations
tab occupation if be_employed == 1 ,gen(occ)
la var occ2 "officer"
la val occ2 occ2
la var occ3 "farmer fisherman and hunter"
la val occ3 occ3
la var occ4 "skilled worker"
la val occ4 occ4
la var occ5 "non-skilled worker"
la val occ5 occ5
la var occ6 "service worker"
la val occ6 occ6
la var occ7 "other(including homemaker)"
la val occ7 occ7

*generate working positions
tab position if be_employed == 1 ,gen(posit)
la var posit2 "permanent worker"
la val posit2 posit2
la var posit3 "contractor"
la val posit3 posit3
la var posit4 "temporary worker"
la val posit4 posit4

*generate working units
gen unit2 = 1 if work_unit == 2 | work_unit == 3
replace unit2 = 0 if mi(unit2) & work_unit != 0
gen unit3 = 1 if work_unit == 4 | work_unit == 5
replace unit3 = 0 if mi(unit3) & work_unit != 0
la var unit2 "public enterprise"
la val unit2 unit2
la var unit3 "private enterprise"
la val unit3 unit3 

*construct three types indicator that reflect the pattern of survey response provided by each individual respondent
*1.1 num: indicator of number of waves that the individual surveyed
*1.2 nextwave: indicator of whether an individual appears in the next wave
*1.3 allwave: indicator of whether an individual appear in all rounds of survey
gen farmer_sample = be_farmer == 1 & (be_migrant == 1 | be_urban_resident == 1)
gen insample = 1 if farmer_sample == 0
gen ID_imputed = string(IDind,"%20.0f") + string(_mi_m)
destring ID_imputed,replace
format ID_imputed %20.0f
*generate the indicator of number of waves that the individual surveyed
sort ID_imputed wave
bys ID_imputed: egen num = sum(insample) if farmer_sample == 0
*generate the indicator of whether an individual appears in the next wave
by ID_imputed: gen nextwave = insample[_n+1] if farmer_sample == 0 & num != 1
replace nextwave = 0 if mi(nextwave) & farmer_sample == 0 & num != 1
*generate indicator of whether an individual appear in all rounds of survey
sum num if farmer_sample == 0 & farmer_sample == 0 & num != 1
gen allwave = num == r(max) if farmer_sample == 0 & num != 1

*generate unique id consindering all imputed data
sort newid _mi_m
gen ID_last = _n

preserve
keep if farmer_sample == 0 & num != 1
sort ID_imputed wave
gen ipw_all_c = .
*predict IPW of each section with covariates of previous wave
foreach i in 1997 2000 2004 2006 2009 2011{
	forv j = 0/20 {
		quiet probit nextwave gender ethnicity age26_30 age31_35 age36_40 age41_45 age46_50 age51_55 year_education be_rural_resident be_migrant num_child num_elderly h_group1 h_group2 h_group4 h_group5 log_pc_income HAI Liaoning Heilongjiang Shandong Henan Hubei Hunan Guangxi Guizhou if _mi_m == `j' & wave == `i'
		predict ps_`i'_`j'_c,p
		replace ipw_all_c = 1/ps_`i'_`j'_c if _mi_m == `j' & wave == `i'
		egen ps_`i'_`j'_c_q = xtile(ps_`i'_`j'_c) if _mi_m == `j' & wave == `i', nq(5)
		bys ps_`i'_`j'_c_q: egen ps_`i'_`j'_c_5 = mean(ps_`i'_`j'_c) if _mi_m == `j' & wave == `i'
	} 
}
*put IPW in each seciton
*set IPW to 1 in in entry wave
foreach i in c {
	replace ipw_all_`i' = 1 if wave == 1993
	replace ipw_all_`i' = 0 if wave == 2015

	sort ID_imputed wave
	bys ID_imputed:gen ipw_`i' = ipw_all_`i'[_n-1]
	replace ipw_`i' = 1 if mi(ipw_`i')
	bys ID_imputed:egen panel_ipw_`i' = prod(ipw_`i')
}
keep ID_last panel_ipw_c
save panel_ipw.dta,replace
restore

*merge weight for adjusting panel attrition
merge 1:1 ID_last using panel_ipw,nogen
     
*assign outcome variables to the local macro
local outcome_1 "be_employed"
local outcome_2 "workmonths1"
local outcome_3 "hours_lastw1"
local outcome_4 "over_lastw"
local outcome_5 "log_wage_i"
local outcome_6 "log_hour_wage"

*assign key independent variables to the local macro
local inde "year1997 year2000 year2004 year2006 year2009 year2011 year2015 be_migrant be_migrant##year1997 be_migrant##year2000 be_migrant##year2004 be_migrant##year2006 be_migrant##year2009 be_migrant##year2011 be_migrant##year2015"

*assign control variables to the local macro
local control_1 "age26_30 age31_35 age36_40 age41_45 age46_50 age51_55 year_education marital_status h_group1 h_group2 h_group4 h_group5 num_child num_elderly population pcgdp child_elder percent_farmer service_sector"
local control_2 "age26_30 age31_35 age36_40 age41_45 age46_50 age51_55 year_education marital_status h_group1 h_group2 h_group4 h_group5 occ2 occ3 occ4 occ5 occ6 occ7 posit2 posit3 posit4 unit2 unit3 num_child num_elderly population pcgdp child_elder percent_farmer service_sector"

*predict inverse mills ratio
prob employed_last1 year1997 year2000 year2004 year2006 year2009 year2011 year2015 be_migrant be_migrant##year1997 be_migrant##year2000 be_migrant##year2004 be_migrant##year2006 be_migrant##year2009 be_migrant##year2011 be_migrant##year2015 age26_30 age31_35 age36_40 age41_45 age46_50 age51_55 gender ethnicity year_education marital_status h_group1 h_group2 h_group4 h_group5 num_child num_elderly population pcgdp child_elder percent_farmer service_sector Liaoning Heilongjiang Shandong Henan Hubei Hunan Guangxi Guizhou if be_rural_resident == 0 & time != 1 & be_farmer != 1
predict gw_urban, xb
gen lambda_urban = normalden(gw_urban)/normal(gw_urban) if employed_last1 == 1 & be_rural_resident == 0 & num != 1 & be_farmer !=  1

local path "${mypath}\CHNS_project\04_tables\appendix_table_6\"

*declare imputated data as panel data
mi xtset IDind wave

*fixed effects regression model using individuals with 3 or 4 observations
forv i = 3/4 {
	quiet eststo: mi estimate,post esampvaryok: xtreg `outcome_1' `inde' `control_1' if be_rural_resident == 0 & time >= `i' & be_farmer !=  1 & degree_education <= 3, fe r noomitted
	est store outcome1_`i',title(outcome1)
	forv j = 2/6 {
		quiet eststo: mi estimate,post esampvaryok: xtreg `outcome_`j'' `inde' `control_1' lambda_urban if employed_last1 == 1 & be_rural_resident == 0 & time >= `i' & be_farmer !=  1 & degree_education <= 3, fe r noomitted
		est store outcome`j'_`i',title(outcome`j'_`i')
	}
}

*fixed effects regression model using weight for adjusting attrition
quiet eststo: mi estimate,post esampvaryok: xtreg be_employed `inde' `control_1' if be_rural_resident == 0 & time != 1 & be_farmer !=  1 & degree_education <= 3 [pw=panel_ipw_c],fe r noomitted
est store outcome1_w,title(outcome1_w)

forv i = 2/6 {
	quiet eststo: mi estimate,post esampvaryok: xtreg `outcome_`i'' `inde' `control_2' lambda_urban if be_employed == 1 & employed_last1==1 & be_rural_resident == 0 & num != 1 & be_farmer !=  1 & degree_education <= 3 [pw=panel_ipw_c],fe r noomitted
	est store outcome`i'_w,title(outcome`i'_w)
}

esttab outcome1_3 outcome1_4 outcome1_w outcome2_3 outcome2_4 outcome2_w outcome3_3 outcome3_4 outcome3_w outcome4_3 outcome4_4 outcome4_w using "`path'01_appendix_table_6_1.csv", constant b(3) star(+ 0.10 * 0.05 ** 0.01) se(%9.3f) r2 replace nogap nonumbers noomitted stats(N_mi)
esttab outcome5_3 outcome5_4 outcome5_w outcome6_3 outcome6_4 outcome6_w using "`path'02_appendix_table_6_2.csv", constant b(3) star(+ 0.10 * 0.05 ** 0.01) se(%9.3f) r2 replace nogap nonumbers noomitted stats(N_mi)

log close
